In [1]:
# Import required libraries
import pandas as pd
import plotly.express as px
import plotly.io as pio
import plotly.graph_objects as go
import plotly.colors
from datetime import datetime
pio.templates.default = 'plotly_white'
In [2]:
# Read data from CSV file
data = pd.read_csv('rfm_data.csv')
data.head()
Out[2]:
CustomerID PurchaseDate TransactionAmount ProductInformation OrderID Location
0 8814 2023-04-11 943.31 Product C 890075 Tokyo
1 2188 2023-04-11 463.70 Product A 176819 London
2 4608 2023-04-11 80.28 Product A 340062 New York
3 2559 2023-04-11 221.29 Product A 239145 London
4 9482 2023-04-11 739.56 Product A 194545 Paris
In [3]:
data.shape
Out[3]:
(1000, 6)
In [4]:
# Convert 'PurchaseDate' to datetime
data['PurchaseDate'] = pd.to_datetime(data['PurchaseDate'])
data.head()
Out[4]:
CustomerID PurchaseDate TransactionAmount ProductInformation OrderID Location
0 8814 2023-04-11 943.31 Product C 890075 Tokyo
1 2188 2023-04-11 463.70 Product A 176819 London
2 4608 2023-04-11 80.28 Product A 340062 New York
3 2559 2023-04-11 221.29 Product A 239145 London
4 9482 2023-04-11 739.56 Product A 194545 Paris
In [5]:
# Calculate Recency
data['Recency'] = (datetime.now().date() - data['PurchaseDate'].dt.date).dt.days
data.head()
Out[5]:
CustomerID PurchaseDate TransactionAmount ProductInformation OrderID Location Recency
0 8814 2023-04-11 943.31 Product C 890075 Tokyo 286
1 2188 2023-04-11 463.70 Product A 176819 London 286
2 4608 2023-04-11 80.28 Product A 340062 New York 286
3 2559 2023-04-11 221.29 Product A 239145 London 286
4 9482 2023-04-11 739.56 Product A 194545 Paris 286
In [6]:
# Calculate Frequency
frequency_data = data.groupby('CustomerID')['OrderID'].count().reset_index()
frequency_data.rename(columns = {'OrderID' : 'Frequency'}, inplace = True)
data = data.merge(frequency_data, on = 'CustomerID', how = 'left')
data.head()
Out[6]:
CustomerID PurchaseDate TransactionAmount ProductInformation OrderID Location Recency Frequency
0 8814 2023-04-11 943.31 Product C 890075 Tokyo 286 1
1 2188 2023-04-11 463.70 Product A 176819 London 286 1
2 4608 2023-04-11 80.28 Product A 340062 New York 286 1
3 2559 2023-04-11 221.29 Product A 239145 London 286 1
4 9482 2023-04-11 739.56 Product A 194545 Paris 286 1
In [7]:
# data.to_csv('rfm_output.csv')
In [8]:
# Calculate Monetary Value
monetary_data = data.groupby('CustomerID')['TransactionAmount'].sum().reset_index()
monetary_data.rename(columns = {'TransactionAmount' : 'MonetaryValue'}, inplace = True)
data = data.merge(monetary_data, on = 'CustomerID', how = 'left')
data.head()
Out[8]:
CustomerID PurchaseDate TransactionAmount ProductInformation OrderID Location Recency Frequency MonetaryValue
0 8814 2023-04-11 943.31 Product C 890075 Tokyo 286 1 943.31
1 2188 2023-04-11 463.70 Product A 176819 London 286 1 463.70
2 4608 2023-04-11 80.28 Product A 340062 New York 286 1 80.28
3 2559 2023-04-11 221.29 Product A 239145 London 286 1 221.29
4 9482 2023-04-11 739.56 Product A 194545 Paris 286 1 739.56
In [9]:
# Define scoring criteria for each RFM value
recency_scores = [5, 4, 3, 2, 1] # Higher score for lower recency (more recent)
frequency_scores = [1, 2, 3, 4, 5] # Higher score for higher frequency
monetary_scores = [1, 2, 3, 4, 5] # Higher score for higher monetary value
data.head()
Out[9]:
CustomerID PurchaseDate TransactionAmount ProductInformation OrderID Location Recency Frequency MonetaryValue
0 8814 2023-04-11 943.31 Product C 890075 Tokyo 286 1 943.31
1 2188 2023-04-11 463.70 Product A 176819 London 286 1 463.70
2 4608 2023-04-11 80.28 Product A 340062 New York 286 1 80.28
3 2559 2023-04-11 221.29 Product A 239145 London 286 1 221.29
4 9482 2023-04-11 739.56 Product A 194545 Paris 286 1 739.56
In [10]:
# Calculate RFM scores
data['RecencyScore'] = pd.cut(data['Recency'], bins = 5, labels = recency_scores)
data['FrequencyScore'] = pd.cut(data['Frequency'], bins = 5, labels = frequency_scores)
data['MonetaryScore'] = pd.cut(data['MonetaryValue'], bins = 5, labels = monetary_scores)
data.head()
Out[10]:
CustomerID PurchaseDate TransactionAmount ProductInformation OrderID Location Recency Frequency MonetaryValue RecencyScore FrequencyScore MonetaryScore
0 8814 2023-04-11 943.31 Product C 890075 Tokyo 286 1 943.31 1 1 2
1 2188 2023-04-11 463.70 Product A 176819 London 286 1 463.70 1 1 1
2 4608 2023-04-11 80.28 Product A 340062 New York 286 1 80.28 1 1 1
3 2559 2023-04-11 221.29 Product A 239145 London 286 1 221.29 1 1 1
4 9482 2023-04-11 739.56 Product A 194545 Paris 286 1 739.56 1 1 2
In [11]:
# Convert RFM scores to numeric type
data['RecencyScore'] = data['RecencyScore'].astype(int)
data['FrequencyScore'] = data['FrequencyScore'].astype(int)
data['MonetaryScore'] = data['MonetaryScore'].astype(int)
data.head()
Out[11]:
CustomerID PurchaseDate TransactionAmount ProductInformation OrderID Location Recency Frequency MonetaryValue RecencyScore FrequencyScore MonetaryScore
0 8814 2023-04-11 943.31 Product C 890075 Tokyo 286 1 943.31 1 1 2
1 2188 2023-04-11 463.70 Product A 176819 London 286 1 463.70 1 1 1
2 4608 2023-04-11 80.28 Product A 340062 New York 286 1 80.28 1 1 1
3 2559 2023-04-11 221.29 Product A 239145 London 286 1 221.29 1 1 1
4 9482 2023-04-11 739.56 Product A 194545 Paris 286 1 739.56 1 1 2
In [12]:
# data.to_csv('rfm_output.csv')
In [13]:
# Calculate RFM score by combining the individual scores
data['RFM_Score'] = data['RecencyScore'] + data['FrequencyScore'] + data['MonetaryScore']
data.head()
Out[13]:
CustomerID PurchaseDate TransactionAmount ProductInformation OrderID Location Recency Frequency MonetaryValue RecencyScore FrequencyScore MonetaryScore RFM_Score
0 8814 2023-04-11 943.31 Product C 890075 Tokyo 286 1 943.31 1 1 2 4
1 2188 2023-04-11 463.70 Product A 176819 London 286 1 463.70 1 1 1 3
2 4608 2023-04-11 80.28 Product A 340062 New York 286 1 80.28 1 1 1 3
3 2559 2023-04-11 221.29 Product A 239145 London 286 1 221.29 1 1 1 3
4 9482 2023-04-11 739.56 Product A 194545 Paris 286 1 739.56 1 1 2 4
In [14]:
# Create RFM segments based on RFM score
segment_labels = ['Low-Value', 'Mid-Value', 'High-Value']
data['Value Segment'] = pd.qcut(data['RFM_Score'], q = 3, labels = segment_labels)
data.head()
Out[14]:
CustomerID PurchaseDate TransactionAmount ProductInformation OrderID Location Recency Frequency MonetaryValue RecencyScore FrequencyScore MonetaryScore RFM_Score Value Segment
0 8814 2023-04-11 943.31 Product C 890075 Tokyo 286 1 943.31 1 1 2 4 Low-Value
1 2188 2023-04-11 463.70 Product A 176819 London 286 1 463.70 1 1 1 3 Low-Value
2 4608 2023-04-11 80.28 Product A 340062 New York 286 1 80.28 1 1 1 3 Low-Value
3 2559 2023-04-11 221.29 Product A 239145 London 286 1 221.29 1 1 1 3 Low-Value
4 9482 2023-04-11 739.56 Product A 194545 Paris 286 1 739.56 1 1 2 4 Low-Value
In [15]:
# data.to_csv('rfm_output.csv')
In [16]:
# RFM Segment Distribution
segment_counts = data['Value Segment'].value_counts().reset_index()
segment_counts.columns = ['Value Segment', 'Count']
data.head()
Out[16]:
CustomerID PurchaseDate TransactionAmount ProductInformation OrderID Location Recency Frequency MonetaryValue RecencyScore FrequencyScore MonetaryScore RFM_Score Value Segment
0 8814 2023-04-11 943.31 Product C 890075 Tokyo 286 1 943.31 1 1 2 4 Low-Value
1 2188 2023-04-11 463.70 Product A 176819 London 286 1 463.70 1 1 1 3 Low-Value
2 4608 2023-04-11 80.28 Product A 340062 New York 286 1 80.28 1 1 1 3 Low-Value
3 2559 2023-04-11 221.29 Product A 239145 London 286 1 221.29 1 1 1 3 Low-Value
4 9482 2023-04-11 739.56 Product A 194545 Paris 286 1 739.56 1 1 2 4 Low-Value
In [17]:
# Define the pastel colour pallete
pastel_colors = px.colors.qualitative.Pastel
In [18]:
# Create the bar chart with pastel colors
fig_segment_dist = px.bar(segment_counts, x = 'Value Segment', y = 'Count', color = 'Value Segment',
                          color_discrete_sequence = pastel_colors, title = 'RFM Value Segment Distribution')
In [19]:
# Update the layout
fig_segment_dist.update_layout(xaxis_title = 'RFM Value Segment', yaxis_title = 'Count', showlegend = False)
In [20]:
# Show the figure
fig_segment_dist.show()
In [21]:
# Create a new column for RFM customer segments
data['RFM Customer Segments'] = ''
data.head()
Out[21]:
CustomerID PurchaseDate TransactionAmount ProductInformation OrderID Location Recency Frequency MonetaryValue RecencyScore FrequencyScore MonetaryScore RFM_Score Value Segment RFM Customer Segments
0 8814 2023-04-11 943.31 Product C 890075 Tokyo 286 1 943.31 1 1 2 4 Low-Value
1 2188 2023-04-11 463.70 Product A 176819 London 286 1 463.70 1 1 1 3 Low-Value
2 4608 2023-04-11 80.28 Product A 340062 New York 286 1 80.28 1 1 1 3 Low-Value
3 2559 2023-04-11 221.29 Product A 239145 London 286 1 221.29 1 1 1 3 Low-Value
4 9482 2023-04-11 739.56 Product A 194545 Paris 286 1 739.56 1 1 2 4 Low-Value
In [22]:
# Assign RFM segments based on RFM score
data.loc[data['RFM_Score'] >= 9, 'RFM Customer Segments'] = 'Champions'
data.loc[(data['RFM_Score'] >= 6) & (data['RFM_Score'] < 9), 'RFM Customer Segments'] = 'Potential Loyalist'
data.loc[(data['RFM_Score'] >= 5) & (data['RFM_Score'] < 6), 'RFM Customer Segments'] = 'At Risk Customers'
data.loc[(data['RFM_Score'] >= 4) & (data['RFM_Score'] < 5), 'RFM Customer Segments'] = "Can't Lose"
data.loc[(data['RFM_Score'] >= 3) & (data['RFM_Score'] < 4), 'RFM Customer Segments'] = 'Lost'
data.head()
Out[22]:
CustomerID PurchaseDate TransactionAmount ProductInformation OrderID Location Recency Frequency MonetaryValue RecencyScore FrequencyScore MonetaryScore RFM_Score Value Segment RFM Customer Segments
0 8814 2023-04-11 943.31 Product C 890075 Tokyo 286 1 943.31 1 1 2 4 Low-Value Can't Lose
1 2188 2023-04-11 463.70 Product A 176819 London 286 1 463.70 1 1 1 3 Low-Value Lost
2 4608 2023-04-11 80.28 Product A 340062 New York 286 1 80.28 1 1 1 3 Low-Value Lost
3 2559 2023-04-11 221.29 Product A 239145 London 286 1 221.29 1 1 1 3 Low-Value Lost
4 9482 2023-04-11 739.56 Product A 194545 Paris 286 1 739.56 1 1 2 4 Low-Value Can't Lose
In [23]:
# Create a column Segment Product Count
segment_product_counts = data.groupby(['Value Segment', 'RFM Customer Segments']).size().reset_index(name = 'Count')
segment_product_counts = segment_product_counts.sort_values('Count', ascending = False)
data.head()
Out[23]:
CustomerID PurchaseDate TransactionAmount ProductInformation OrderID Location Recency Frequency MonetaryValue RecencyScore FrequencyScore MonetaryScore RFM_Score Value Segment RFM Customer Segments
0 8814 2023-04-11 943.31 Product C 890075 Tokyo 286 1 943.31 1 1 2 4 Low-Value Can't Lose
1 2188 2023-04-11 463.70 Product A 176819 London 286 1 463.70 1 1 1 3 Low-Value Lost
2 4608 2023-04-11 80.28 Product A 340062 New York 286 1 80.28 1 1 1 3 Low-Value Lost
3 2559 2023-04-11 221.29 Product A 239145 London 286 1 221.29 1 1 1 3 Low-Value Lost
4 9482 2023-04-11 739.56 Product A 194545 Paris 286 1 739.56 1 1 2 4 Low-Value Can't Lose
In [24]:
# Create a treemap with pastel colors
fig_treemap_segment_product = px.treemap(segment_product_counts,
                                        path = ['Value Segment', 'RFM Customer Segments'],
                                        values = 'Count',
                                        color = 'Value Segment', color_discrete_sequence = px.colors.qualitative.Pastel,
                                        title = 'RFM Customer Segments by Value')
C:\Users\Mahesh S Valanju\anaconda3\lib\site-packages\plotly\express\_core.py:1637: FutureWarning:

The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.

C:\Users\Mahesh S Valanju\anaconda3\lib\site-packages\plotly\express\_core.py:1637: FutureWarning:

The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.

In [25]:
# Show the figure
fig_treemap_segment_product.show()
In [26]:
# Filter the data to include only the customers in the champions segment
champions_segment = data[data['RFM Customer Segments'] == 'Champions']
In [27]:
# Create the box plot
champions_segment_fig = go.Figure()
champions_segment_fig.add_trace(go.Box(y = champions_segment['RecencyScore'], name = 'Recency'))
champions_segment_fig.add_trace(go.Box(y = champions_segment['FrequencyScore'], name = 'Frequency'))
champions_segment_fig.add_trace(go.Box(y = champions_segment['MonetaryScore'], name = 'Monetary'))
In [28]:
# Update the layout
champions_segment_fig.update_layout(title = 'Distribution of RFM Values within Champions Segment',
                                   yaxis_title = 'RFM Value', showlegend = True)
In [29]:
# Show the figure
champions_segment_fig.show()
In [30]:
# Correlation Matrix
correlation_matrix = champions_segment[['RecencyScore', 'FrequencyScore', 'MonetaryScore']].corr()
In [31]:
# Visualize the correlation matrix using a heatmap
fig_corr_heatmap = go.Figure(data = go.Heatmap(
                    z = correlation_matrix.values,
                    x = correlation_matrix.columns,
                    y = correlation_matrix.columns,
                    colorscale = 'RdBu',
                    colorbar = dict(title = 'Correlation')))
In [32]:
# Update layout
fig_corr_heatmap.update_layout(title = 'Correlation Matrix of RFM Values Within Champions Segment')
In [33]:
# Show the figure
fig_corr_heatmap.show()
In [34]:
# Define the pastel colour pallete
pastel_colors = plotly.colors.qualitative.Pastel
In [35]:
# Customer segment counts
segment_counts = data['RFM Customer Segments'].value_counts()
In [36]:
# Create a bar chart to compare segment counts
comparison_fig = go.Figure(data = [go.Bar(x = segment_counts.index, y = segment_counts.values,
                                         marker = dict(color = pastel_colors))])
In [37]:
# Set the color of the champions segment as a different color
champions_color = 'rgb(158, 202, 225)'
comparison_fig.update_traces(marker_color = [champions_color if segment == 'Champions' else pastel_colors[i]
                                             for i, segment in enumerate(segment_counts.index)],
                             marker_line_color = 'rgb(8, 48, 107)',
                             marker_line_width = 1.5, opacity = 0.6)
In [38]:
# Update the layout
comparison_fig.update_layout(title = 'Comparison of RFM Segments',
                            xaxis_title = 'RFM Segments',
                            yaxis_title = 'Number of Customers',
                            showlegend = False)
In [39]:
# Show the figure
comparison_fig.show()
In [40]:
# Calculate the average Recency, Frequency and Monetary scores for each segment
segment_scores = data.groupby('RFM Customer Segments')['RecencyScore', 'FrequencyScore', 'MonetaryScore'].mean().reset_index()
C:\Users\Mahesh S Valanju\AppData\Local\Temp\ipykernel_23128\56607270.py:2: FutureWarning:

Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.

In [41]:
# Create a grouped bar chart to compare segment scores
fig = go.Figure()
In [42]:
# Add bars for Recency score
fig.add_trace(go.Bar(x = segment_scores['RFM Customer Segments'],
                    y = segment_scores['RecencyScore'],
                    name = 'Recency Score',
                    marker_color = 'rgb(158, 202, 225)'))
In [43]:
# Add bars for Frequency score
fig.add_trace(go.Bar(x = segment_scores['RFM Customer Segments'],
                    y = segment_scores['FrequencyScore'],
                    name = 'Frequency Score',
                    marker_color = 'rgb(94, 158, 217)'))
In [44]:
# Add bars for Monetary score
fig.add_trace(go.Bar(x = segment_scores['RFM Customer Segments'],
                    y = segment_scores['MonetaryScore'],
                    name = 'Monetary Score',
                    marker_color = 'rgb(32, 102, 148)'))
In [45]:
# Update the layout
fig.update_layout(title = 'Comparison of RFM Segments based on Recency, Frequency and Monetary Scores',
                 xaxis_title = 'RFM Segments',
                 yaxis_title = 'Score',
                 barmode = 'group',
                 showlegend = True)
In [46]:
# Show the figure
fig.show()
In [47]:
# Import required libraries for dahboard
import dash
from dash import dcc, html
from dash.dependencies import Input, Output
import plotly.io as pio
import plotly.colors as pc
In [48]:
# Initialize the Dash app
app = dash.Dash(__name__)
In [49]:
# Define the app layout using Bootstrap components
app.layout = html.Div([
    html.H1('RFM Analysis Dashboard', className = 'text-center mb-4'),
    html.Div('Analyze Customer Segments Based on RFM Scores.', className = 'text-center mb-4'),
    
    # Dropdown for selecting the chart
    dcc.Dropdown(
    id = 'chart-type-dropdown',
    options = [
        {'label' : 'RFM Value Segment Distribution', 'value' : 'segment_distribution'},
        {'label' : 'Distribution of RFM Values within Customer Segment', 'value' : 'RFM_distribution'},
        {'label' : 'Correlation Matrix of RFM Values within Champions Segment', 'value' : 'correlation_matrix'},
        {'label' : 'Comparison of RFM Segments', 'value' : 'segment_comparison'},
        {'label' : 'Comparison of RFM Segments based on Scores', 'value' : 'segment_scores'},
    ],
    value = 'segment_distribution', # Default selection
    className = 'mb-4',
    ),
    # Graph container
    dcc.Graph(id = 'rfm-chart', className = 'mb-4')
])
In [50]:
# Define callback to update selected chart
@app.callback(
    Output('rfm-chart', 'figure'),
    [Input('chart-type-dropdown', 'value')]
)

def update_chart(selected_chart_type):
    if selected_chart_type == 'segment_distribution':
        return fig_segment_dist
    elif selected_chart_type == 'RFM_distribution':
        return fig_treemap_segment_product
    elif selected_chart_type == 'correlation_matrix':
        return fig_corr_heatmap
    elif selected_chart_type == 'segment_comparison':
        return comparison_fig
    elif selected_chart_type == 'segment_scores':
        return fig
    
    # Return a default chart if no valid selection
    return fig_segment_dist

if __name__ == '__main__':
    app.run_server(port=8052)